######################################################################
### Replication Materials for Parallel Trends and Placebo Tests for DiD Analyses ###
######################################################################

#######################################
#Quasi-Poisson Models 
#######################################
rm(list = ls())
setwd()
# Packages
pacman::p_load(rtweet,
               ggplot2,
               purrrlyr,
               dplyr,
               readxl,
               writexl,
               broom,
               tidyverse,
               dotwhisker,
               lubridate,
               stargazer,
               ggpubr, 
               jtools,
               rpart,
               caret,
               reshape2,
               ipred,
               RColorBrewer,
               rpart.plot,
               MASS,
               randomForest,
               dplyr,
               car,
               vtable,
               effects,
               interactions,
               doParallel,
               foreach
               
)

set.seed(0611)

#Import data
df <- readRDS("df.RDS")

#########################
#Set Key Vars as Factors
#########################
df$conspir.gen <- as.factor(df$conspir.gen)
df$conspir.tusk <- as.factor(df$conspir.tusk)
df$conspir.po <- as.factor(df$conspir.po)  
df$conspir.collusion <- as.factor(df$conspir.collusion)
df$officials <- as.factor(df$officials)
df$month <- as.factor(df$month)

# Creating wartime variable 
df %>%
  mutate(War = created_at > "2022-02-24") -> df

# coercing for formatting 
df$War <- ifelse(df$War == "TRUE", 1, 0)

df$War <- as.factor(df$War)


##############
## Placebos ###
#############


#### First, selecting new 'treatment' dates 
date <- sample(seq(as.Date('2021/10/11'), as.Date('2022/02/23'), by="day"), 100)

print(date)  

#Change out the seq of date[] to include each item in vector, run tests 

tusk_fav_df <- as.data.frame(c())
tusk_rt_df <- as.data.frame(c())
po_fav_df <- as.data.frame(c())
po_rt_df <- as.data.frame(c())
col_fav_df <- as.data.frame(c())
col_rt_df <- as.data.frame(c())

for(i in 1:length(date)){
  
  #subset data to pre-treatment
  
  df %>% 
    filter(War == "0") %>% mutate(Placebo = created_at > date[i]) -> df_placebo
  
  
  tusk_fav <- tidy(glm(data = df_placebo, favorite_count ~ conspir.tusk*Placebo + log_follow + 
                         log_friends + verified + month, family = "quasipoisson"))
  tusk_fav$Boot <- i
  tusk_fav_df <- as.data.frame(rbind(tusk_fav, tusk_fav_df))
  
  
  tusk_rt  <- tidy(glm(data = df_placebo, retweet_count ~ conspir.tusk*Placebo + log_follow + 
                         log_friends + verified + month, family = "quasipoisson"))
  
  tusk_rt$Boot <- i
  tusk_rt_df <- as.data.frame(rbind(tusk_rt, tusk_rt_df))
  
  po_fav <- tidy(glm(data = df_placebo, favorite_count ~ conspir.po*Placebo + log_follow + 
                       log_friends + verified + month, family = "quasipoisson"))
  
  po_fav$Boot <- i
  po_fav_df <- as.data.frame(rbind(po_fav, po_fav_df))
  
  
  po_rt <- tidy(glm(data = df_placebo, retweet_count ~ conspir.po*Placebo + log_follow + 
                      log_friends + verified + month, family = "quasipoisson"))
  po_rt$Boot <- i
  po_rt_df <- as.data.frame(rbind(po_rt, po_rt_df))
  
  
 
  col_fav <- tidy(glm(data = df_placebo, favorite_count ~ conspir.collusion*Placebo + log_follow + 
                        log_friends + verified + month, family = "quasipoisson"))
  
  
  col_fav$Boot <- i
  col_fav_df <- as.data.frame(rbind(col_fav, col_fav_df))
  
  col_rt <- tidy(glm(data = df_placebo, retweet_count ~ conspir.collusion*Placebo + log_follow + 
                       log_friends + verified + month, family = "quasipoisson"))
  
  col_rt$Boot <- i
  col_rt_df <- as.data.frame(rbind(col_rt, col_rt_df))
  
}

tusk_fav_df %>% 
  group_by(term) %>%
  summarise(estimate = round(mean(estimate, na.rm = T), digits = 4), 
            p.value = round(mean(p.value, na.rm = T), digits = 4))-> tuskfavstats

tusk_rt_df %>% 
  group_by(term) %>%
  summarise(estimate = round(mean(estimate, na.rm = T), digits = 4), 
            p.value = round(mean(p.value, na.rm = T), digits = 4))-> tuskrtstats


po_fav_df %>% 
  group_by(term) %>%
  summarise(estimate = round(mean(estimate, na.rm = T), digits = 4), 
            p.value = round(mean(p.value, na.rm = T), digits = 4)) -> pofavstats

po_rt_df %>% 
  group_by(term) %>%
  summarise(estimate = round(mean(estimate, na.rm = T), digits = 4), 
            p.value = round(mean(p.value, na.rm = T), digits = 4))-> portstats

col_fav_df %>% 
  group_by(term) %>%
  summarise(estimate = round(mean(estimate, na.rm = T), digits = 4), 
            p.value = round(mean(p.value, na.rm = T), digits = 4))-> colfavstats

col_rt_df %>% 
  group_by(term) %>% 
  summarise(estimate = round(mean(estimate, na.rm = T), digits = 4), 
            p.value = round(mean(p.value, na.rm = T), digits = 4)) -> colrtstats

# replace out with each model for printing
stargazer(colfavstats,
  column.sep.width = "-15pt",
  title = "Tweet Characteristics and Frequency of Likes - Placebo- quasi-Poisson",
  dep.var.caption = c("Number of Likes"),
  model.names = FALSE,
  type = "text",
  out = "Figures for Paper/placebo_favs_quasi.htm",
  omit = c("month10", 
           "month11",
           "month2",
           "month12",
           "month8",
           "month9"), #so it's prettier
  summary = F, 
  notes = c("Results from 100 simulations of false treatment dates"),
  align=T, digits=4, no.space=T, flip=F)


stargazer(colrtstats,
          column.sep.width = "-15pt",
          title = "Tweet Characteristics and Frequency of Retweets - Placebo- quasi-Poisson",
          dep.var.caption = c("Number of Retweets"),
          model.names = FALSE,
          type = "text",
          out = "Figures for Paper/placebo_rts_quasi.htm",
          omit = c("month"), #so it's prettier
         summary = F, 
          notes = c("Results from 100 simulations of false treatment dates"),
          align=T, digits=4, no.space=T, flip=F)


###### Plotting Parallel Trends ######


df$month <- ordered(df$month, levels = c("8",
                                         "9", "10",
                                         "11", "12",
                                         "1", "2",
                                         "3", "4",
                                         "5", "6",
                                         "7"))


df %>%
  filter(!is.na(conspir.po)) %>%
  pivot_longer(cols = 4:5, names_to = "Variable") %>%
  ggplot(aes(x = month, y = value, color = factor(conspir.po))) +
  facet_grid(~ Variable) +
  geom_boxplot(alpha = .2) +
  theme_classic() +
  theme_classic() +
  ylim(c(0,200)) +
  scale_color_grey() +
  labs(color = "PO Conspiracy",
       title = "Parallel Trends - PO Conspiracy") -> par.po

#ggsave("parallel_po.jpg", par.po)


df %>%
  filter(!is.na(conspir.collusion)) %>%
  pivot_longer(cols = 4:5, names_to = "Variable") %>%
  ggplot(aes(x = month, y = value, color = factor(conspir.collusion))) +
  facet_grid(~ Variable) +
  geom_boxplot(alpha = .2) +
  theme_classic() +
  ylim(c(0,200)) +
  scale_color_grey() +
  labs(color = "Collusion Conspiracy",
       title = "Parallel Trends - Collusion Conspiracy") -> par.col

#ggsave("parallel_col.jpg", par.col)

df %>%
  filter(!is.na(conspir.tusk)) %>%
  pivot_longer(cols = 4:5, names_to = "Variable") %>%
  ggplot(aes(x = month, y = value, color = factor(conspir.tusk))) +
  facet_grid(~ Variable) +
  geom_boxplot(alpha = .2) +
  theme_classic() +
  ylim(c(0,200)) +
  scale_color_grey() +
  labs(color = "Tusk Conspiracy",
       title = "Parallel Trends - Tusk Conspiracy") -> par.tusk

par.tusk
#ggsave("parallel_tusk.jpg", par.tusk)


